Project Visualizations

Here are some sample plots using the voting data. Any thoughts about what this tells us for analysis?

Setup

Note the setup variables below. These are supposed to be controls all the plots. Sometimes they are. Sometimes not. Have to clean this up, but the goal is uniformity across all the plots.

For fonts, I tried to use ‘sans’ which should pick the system sans serif font for whatever OS is running. In the plotly plot I had to pick a specific font or it defaults to Times New Roman. I picked Arial, but we should look at including Helvetica for macOS users.

# --- Load libraries ---
library(ggplot2)
library(ggforce)
library(dplyr)
library(knitr)
library(readr)
library(sf)
library(tigris)
library(plotly)

# --- Global color palette (Civic Triangle style) ---
fill_col   <- "#ffffff"   # white background
line_col   <- "#3a5f7d"   # blue-grey for roads and outlines
text_col   <- "#2f3b44"   # text and titles
alt_line   <- "#536cae"   # secondary line color
alt_text   <- "#536cae"   # secondary text color
border_col <- "#e6eef5"   # light blue-grey for county borders

# --- Load Texas county-level data ---
turnout <- read_csv("tx_county_voting_data_2020.csv")

# --- Compute voter turnout rate ---
turnout <- turnout %>%
  mutate(
    Turnout_Rate = (Total_Votes_Cast / Registered_Voters) * 100
  )

# --- Map education categories to years of schooling ---
years_map <- c(
  AMRZE002 = 0,  AMRZE003 = 0,  AMRZE004 = 0,
  AMRZE005 = 1,  AMRZE006 = 2,  AMRZE007 = 3,  AMRZE008 = 4,
  AMRZE009 = 5,  AMRZE010 = 6,  AMRZE011 = 7,  AMRZE012 = 8,
  AMRZE013 = 9,  AMRZE014 = 10, AMRZE015 = 11, AMRZE016 = 12,
  AMRZE017 = 12, AMRZE018 = 12, AMRZE019 = 13, AMRZE020 = 14,
  AMRZE021 = 14, AMRZE022 = 16, AMRZE023 = 18,
  AMRZE024 = 19, AMRZE025 = 20
)

edu_cols <- intersect(names(turnout), names(years_map))

# --- Compute mean years of schooling and normalized education index (0–1) ---
turnout <- turnout %>%
  mutate(
    total_edu_pop = rowSums(across(all_of(edu_cols)), na.rm = TRUE),
    mean_years = rowSums(across(all_of(edu_cols)) *
                         unname(years_map[edu_cols]), na.rm = TRUE) / total_edu_pop,
    edu_index = (mean_years - 0) / (20 - 0)
  )

# --- Create composite Civic variable combining Voting and Education ---
turnout <- turnout %>%
  mutate(
    # Composite civic vitality score
    vote_edu = (Turnout_Rate/100 + edu_index) / 2
  )

# --- Preview the first few rows to confirm ---
head(turnout)
# A tibble: 6 × 34
  County    COG    Registered_Voters Total_Votes_Cast AMRZE001 AMRZE002 AMRZE003
  <chr>     <chr>              <dbl>            <dbl>    <dbl>    <dbl>    <dbl>
1 Anderson  ETCOG              29274            19227    42328      494        0
2 Andrews   PBDC               10272             5863    10944      277        0
3 Angelina  DETCOG             53166            34574    57331     1664       16
4 Aransas   CCRPC              18306            12290    17811      247        0
5 Archer    NCTCOG              6538             4796     6192       33        0
6 Armstrong PHRC                1498             1112     1338       21        4
# ℹ 27 more variables: AMRZE004 <dbl>, AMRZE005 <dbl>, AMRZE006 <dbl>,
#   AMRZE007 <dbl>, AMRZE008 <dbl>, AMRZE009 <dbl>, AMRZE010 <dbl>,
#   AMRZE011 <dbl>, AMRZE012 <dbl>, AMRZE013 <dbl>, AMRZE014 <dbl>,
#   AMRZE015 <dbl>, AMRZE016 <dbl>, AMRZE017 <dbl>, AMRZE018 <dbl>,
#   AMRZE019 <dbl>, AMRZE020 <dbl>, AMRZE021 <dbl>, AMRZE022 <dbl>,
#   AMRZE023 <dbl>, AMRZE024 <dbl>, AMRZE025 <dbl>, Turnout_Rate <dbl>,
#   total_edu_pop <dbl>, mean_years <dbl>, edu_index <dbl>, vote_edu <dbl>

Figure 1

library(ggplot2)
library(ggforce)

# --- Triangle node coordinates ---
triangle <- data.frame(
  label = c("Education", "Health", "Voter Turnout"),
  x = c(0, 1, 0.5),
  y = c(0, 0, sqrt(3)/2)
)

# --- Arrows for feedback loops ---
arrows <- data.frame(
  x = c(0.5, 1, 0),
  y = c(sqrt(3)/2, 0, 0),
  xend = c(1, 0, 0.5),
  yend = c(0, 0, sqrt(3)/2)
)

# --- Plot ---
p <- ggplot() +
  # 1. Draw curved arrows FIRST (behind nodes)
  geom_curve(data = arrows, aes(x = x, y = y, xend = xend, yend = yend),
             curvature = -0.25,
             arrow = arrow(length = unit(0.3, "cm")),
             color = line_col, linewidth = 0.9) +

  # 2. Triangle outline
  geom_polygon(data = triangle, aes(x, y),
               fill = fill_col, color = line_col, linewidth = 1.2) +

  # 3. Draw nodes ON TOP of curves to hide gaps
  geom_point(data = triangle, aes(x, y),
             size = 8, stroke = 1.5, shape = 21,
             fill = line_col, color = line_col) +

  # 4. Labels with spacing adjustments
  geom_text(data = triangle, aes(x, y, label = label),
            vjust = c(2.3, 2.3, -3.6),
            size = 5, family = "sans",
            fontface = "bold", color = text_col) +

  # 5. Central annotation (lowered)
  annotate("text", x = 0.5, y = 0.32,
           label = "Mutual Reinforcement\nand Feedback",
           color = text_col, size = 4.2,
           family = "sans", lineheight = 1.2) +

  theme_void() +
  coord_equal(xlim = c(-0.25, 1.25), ylim = c(-0.25, 1.05), clip = "off") +
  theme(
    plot.margin = margin(50, 50, 50, 50),
    plot.title = element_text(
      family = "sans", face = "bold", size = 16,
      hjust = 0.5, color = text_col
    )
  ) +
  ggtitle("The Civic Triangle: Education, Health, and Voter Turnout")

# --- Display inline ---
p

# --- Export PNG with full bounding box ---
ggsave("civic_triangle_final.png", plot = p, width = 9, height = 8,
       dpi = 300, units = "in", limitsize = FALSE)
Figure 1: The Civic Triangle linking Education, Health, and Voter Turnout as mutually reinforcing dimensions of civic vitality.

Figure 2

This chart shows name and turn out on hover.

The interesting thing to note is that the exurbs have higher turnout than urban cores, but the most rural areas have the lowest turnout. Might need to do some math on this based on classifying counties as rural, urban, and suburban to see what shakes out.

# --- Load Texas county geometries ---
options(tigris_use_cache = TRUE)
tx_counties <- counties(state = "TX", cb = TRUE, class = "sf") %>%
  mutate(County = gsub(" County", "", NAME))

# --- Merge turnout and compute quintiles ---
tx_map <- tx_counties %>%
  left_join(turnout, by = "County") %>%
  mutate(quintile = ntile(-Turnout_Rate, 5))

# --- Load and simplify major roads ---
roads <- primary_roads(class = "sf")
roads_tx <- st_intersection(roads, st_union(st_geometry(tx_counties))) %>%
  st_cast("LINESTRING") %>%
  st_coordinates() %>%
  as.data.frame() %>%
  rename(lon = X, lat = Y)

# --- Quintile fill palette (yellow → red) ---
palette_turnout <- rev(c("#8b0000", "#d73a1f", "#f97c18", "#ffb94e", "#fff5a1"))

# --- Build ggplot ---
p_map <- ggplot() +
  geom_sf(
    data = tx_map,
    aes(
      fill = factor(quintile),
      text = paste0(
        "<b>", County, " County</b><br>",
        "Turnout Rate: ", round(Turnout_Rate, 1), "%"
      )
    ),
    color = border_col, linewidth = 0.25
  ) +
  geom_path(
    data = roads_tx,
    aes(x = lon, y = lat, group = L1),
    color = adjustcolor(line_col, alpha.f = 0.4),
    linewidth = 0.4
  ) +
  scale_fill_manual(
    values = palette_turnout,
    breaks = c("1", "2", "3", "4", "5"),
    labels = c("Highest Turnout", "", "", "", "Lowest Turnout"),
    name = NULL,
    drop = FALSE
  ) +
  coord_sf() +
  theme_void() +
  theme(
    plot.background   = element_rect(fill = fill_col, color = NA),
    panel.background  = element_rect(fill = fill_col, color = NA),
    legend.background = element_rect(fill = fill_col, color = NA),
    legend.position   = "right",
    legend.direction  = "vertical",
    legend.justification = "center",
    legend.key.width  = unit(0.5, "cm"),
    legend.key.height = unit(0.8, "cm"),
    legend.text  = element_text(family = "Arial", color = text_col, size = 10),
    plot.title  = element_text(
      family = "Arial", face = "bold", size = 16,
      hjust = 0.5, color = text_col
    ),
    plot.margin = margin(40, 40, 40, 40)
  ) +
  ggtitle("Texas County Voter Turnout, 2020")

# --- Convert to interactive Plotly map ---
p_map_interactive <- ggplotly(p_map, tooltip = "text") %>%
  layout(
    font = list(family = "Arial, Helvetica, sans-serif", color = text_col),
    paper_bgcolor = fill_col,
    plot_bgcolor = fill_col,
    title = list(
      text = "<b>Texas County Voter Turnout, 2020</b>",
      font = list(family = "Arial, Helvetica, sans-serif", size = 18, color = text_col),
      x = 0.5, xanchor = "center"
    ),
    legend = list(
      orientation = "v",
      x = 1.02,
      y = 0.5,
      xanchor = "left",
      yanchor = "middle",
      traceorder = "normal",
      font = list(family = "Arial, Helvetica, sans-serif", color = text_col, size = 11)
    )
  )

# --- Fix legend labels in Plotly output (keep all colors visible) ---
for (i in seq_along(p_map_interactive$x$data)) {
  if (!is.null(p_map_interactive$x$data[[i]]$name)) {
    p_map_interactive$x$data[[i]]$name <- switch(
      p_map_interactive$x$data[[i]]$name,
      "1" = "Highest Turnout",
      "5" = "Lowest Turnout",
      "2" = " ",
      "3" = " ",
      "4" = " ",
      p_map_interactive$x$data[[i]]$name
    )
  }
}

p_map_interactive
Figure 2: Interactive choropleth of 2020 Texas county voter turnout with highway overlay. Light yellow = highest turnout; dark red = lowest turnout.

Figure 3

This was mapped to see if it would be reasonable to generate the next radar charts using COGs. It would be reasonable for some areas such as Houston, DFW and San Antonio, but not for all regions.

#| label: fig-texas-cog-map
#| fig-cap: "Texas Councils of Governments (COGs) with major highways shown in blue-grey. Boundaries follow official regional divisions used for regional planning and coordination."
#| fig-width: 9
#| fig-height: 8
#| echo: TRUE
#| message: FALSE
#| warning: FALSE

# --- Civic Triangle palette (standard colors) ---
fill_col   <- "#ffffff"   # white background
line_col   <- "#3a5f7d"   # blue-grey lines and accents
text_col   <- "#2f3b44"   # dark blue-grey text
border_col <- "#e6eef5"   # light blue-grey for boundaries

# --- Load Texas counties and merge with COG assignments ---
options(tigris_use_cache = TRUE)
tx_counties <- counties(state = "TX", cb = TRUE, class = "sf") %>%
  mutate(County = gsub(" County", "", NAME))
Retrieving data for the year 2024
# Read your existing CSV (with COG column added)
turnout_with_cog <- turnout

# Merge county geometries with COGs
tx_cog_map <- tx_counties %>%
  left_join(turnout_with_cog, by = "County") %>%
  filter(!is.na(COG)) %>%
  st_as_sf()

# --- Load and clip Texas primary roads ---
roads <- primary_roads(class = "sf")
Retrieving data for the year 2024
roads_tx <- st_intersection(roads, st_union(st_geometry(tx_counties)))
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
# --- Assign a distinct qualitative color palette for 24 COGs ---
# Choose distinct hues, all harmonious with the Civic palette
cog_palette <- c(
  "#e15759", "#f28e2b", "#edc948", "#59a14f", "#76b7b2", "#4e79a7",
  "#9c755f", "#af7aa1", "#ff9da7", "#c49c94", "#8cd17d", "#b6992d",
  "#499894", "#86bcb6", "#fabfd2", "#e15759", "#79706e", "#bab0ab",
  "#d37295", "#d4a6c8", "#a0cbe8", "#f1ce63", "#b07aa1", "#ffbe7d"
)

# --- Build map ---
p_cog <- ggplot() +
  geom_sf(
    data = tx_cog_map,
    aes(fill = COG),
    color = border_col, linewidth = 0.3
  ) +
  geom_sf(
    data = roads_tx,
    color = adjustcolor(line_col, alpha.f = 0.5),
    linewidth = 0.4
  ) +
  scale_fill_manual(
    values = cog_palette,
    name = "Council of Government (COG)"
  ) +
  coord_sf() +
  theme_void() +
  theme(
    plot.background   = element_rect(fill = fill_col, color = NA),
    panel.background  = element_rect(fill = fill_col, color = NA),
    legend.background = element_rect(fill = fill_col, color = NA),
    legend.position   = "right",
    legend.direction  = "vertical",
    legend.justification = "center",
    legend.text  = element_text(family = "Arial", color = text_col, size = 9),
    legend.title = element_text(family = "Arial", face = "bold", color = text_col, size = 10),
    plot.title  = element_text(
      family = "Arial", face = "bold", size = 16,
      hjust = 0.5, color = text_col
    ),
    plot.margin = margin(40, 40, 40, 40)
  ) +
  ggtitle("Texas Councils of Governments (COGs)")

p_cog

Table 1: Abbreviations and full names for Texas Councils of Governments (COGs).
Abbreviations and full names for Texas Councils of Governments (COGs).
**Abbreviation** **Full Name / Description**
AAMPO Alamo Area Metropolitan Planning Organization (also serves as the COG)
ACOG Ark-Tex Council of Governments
BVRG Brazos Valley Council of Governments
CAPCOG Capital Area Council of Governments
CCRPC Coastal Bend Council of Governments (formerly Corpus Christi Regional Planning Commission)
CEN-TEX Central Texas Council of Governments
CTCOG Central Texas Council of Governments (same as CEN-TEX, used for clarity)
DETCOG Deep East Texas Council of Governments
ETCOG East Texas Council of Governments
ETC East Texas Council (of Governments)
GCRPC Golden Crescent Regional Planning Commission
H-GAC Houston-Galveston Area Council
LGRC Lower Rio Grande Valley Development Council
NCTCOG North Central Texas Council of Governments
NORTEX Nortex Regional Planning Commission
PBDC Permian Basin Regional Planning Commission
PHRC Panhandle Regional Planning Commission
RPCF Rio Grande Council of Governments (formerly Region 10 Planning Commission)
SETRPC South East Texas Regional Planning Commission
SPPDC South Plains Association of Governments
TML Texas Department of Transportation’s TxDOT Lubbock District (no formal COG; used for this regional grouping)
WCTCOG West Central Texas Council of Governments

Figure 4

This is a radar chart of Turnout_Ratio, but only for the DFW COG. These charts might be interesting for a pick menu in Shiny.

library(fmsb)
library(dplyr)
library(scales)

# --- COGs to visualize ---
target_cogs <- c("NCTCOG", "H-GAC", "AAMPO")

# --- Function to draw a radar chart for a single COG ---
plot_cog_radar <- function(cog_name) {
  cog_data <- turnout %>%
    filter(COG == cog_name) %>%
    arrange(desc(Turnout_Rate))
  
  if (nrow(cog_data) < 3) {
    message(paste("Skipping", cog_name, "- not enough counties for a radar chart"))
    return(NULL)
  }

  # --- Prepare radar data ---
  cog_chart <- cog_data %>%
    select(County, Turnout_Rate, Total_Votes_Cast)

  max_rate <- ceiling(max(cog_chart$Turnout_Rate))
  min_rate <- floor(min(cog_chart$Turnout_Rate))

  radar_data <- data.frame(
    rbind(
      rep(max_rate, nrow(cog_chart)),
      rep(min_rate, nrow(cog_chart)),
      cog_chart$Turnout_Rate
    )
  )
  colnames(radar_data) <- cog_chart$County

  # --- Plot radar chart ---
  par(
    mar = c(2, 2, 4, 2),
    bg = fill_col,
    family = "Arial"
  )

  radarchart(
    radar_data,
    axistype = 1,
    vlabels = cog_chart$County,
    vlcex = 0.8,
    pcol = line_col,
    pfcol = adjustcolor(line_col, alpha.f = 0.15),
    plwd = 2,
    cglcol = border_col,
    cglty = 1,
    cglwd = 0.8,
    axislabcol = text_col,
    caxislabels = seq(min_rate, max_rate, length.out = 5),
    title = paste(cog_name, "County Turnout Radar (2020)")
  )
}

# --- Plot each COG separately ---
par(mfrow = c(2, 2))  # up to 4 plots per page
for (cog in target_cogs) {
  plot_cog_radar(cog)
}
Figure 3: Radar charts of voter turnout rates for three Texas Councils of Governments (COGs): NCTCOG, H-GAC, and AAMPO. Each plot shows county-level turnout rates.

Figure 5

This chart is not useful, so I did not fix it up, but I left it in anyway.

#| label: fig-texas-dual-axis
#| fig-cap: "Texas counties sorted by total registered voters. Bars show voter turnout percentage; the red line shows total registered voters (right axis)."
#| fig-width: 12
#| fig-height: 7
#| echo: TRUE
#| message: FALSE
#| warning: FALSE

library(ggplot2)
library(dplyr)
library(scales)

# --- Sort all Texas counties by registered voters ---
tx_dual <- turnout %>%
  arrange(desc(Registered_Voters)) %>%
  mutate(County = factor(County, levels = County))

# --- Build dual-axis chart ---
ggplot(tx_dual, aes(x = County)) +

  # --- Left axis: Turnout percentage (bars) ---
  geom_col(aes(y = Turnout_Rate),
           fill = adjustcolor(line_col, alpha.f = 0.6),
           color = border_col,
           width = 0.6) +

  # --- Right axis: Registered voters (scaled line) ---
  geom_line(aes(y = Registered_Voters / max(Registered_Voters) * max(Turnout_Rate)),
            color = "#d73a1f", linewidth = 1.1, group = 1) +
  geom_point(aes(y = Registered_Voters / max(Registered_Voters) * max(Turnout_Rate)),
             color = "#d73a1f", size = 1.8) +

  # --- Axes ---
  scale_y_continuous(
    name = "Voter Turnout (%)",
    sec.axis = sec_axis(
      ~ . / max(tx_dual$Turnout_Rate) * max(tx_dual$Registered_Voters),
      name = "Total Registered Voters",
      labels = label_comma()
    )
  ) +
  scale_x_discrete(expand = expansion(add = 0.5)) +

  # --- Theme ---
  theme_minimal(base_family = "Arial") +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 7, color = text_col),
    axis.text.y.left  = element_text(color = text_col),
    axis.text.y.right = element_text(color = "#d73a1f"),
    axis.title.y.left  = element_text(color = text_col, face = "bold"),
    axis.title.y.right = element_text(color = "#d73a1f", face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title = element_text(family = "Arial", face = "bold",
                              hjust = 0.5, size = 16, color = text_col),
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.background = element_rect(fill = fill_col, color = NA),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Texas County Voter Turnout vs. Registered Voters (2020)")

#| label: fig-top15-dual-axis
#| fig-cap: "Top 15 Texas counties by registered voters. Bars show turnout percentage; red line shows total registered voters (right axis)."
#| fig-width: 10
#| fig-height: 6
#| echo: TRUE
#| message: FALSE
#| warning: FALSE

tx_top15 <- turnout %>%
  arrange(desc(Registered_Voters)) %>%
  slice(1:15) %>%
  mutate(County = factor(County, levels = County))

ggplot(tx_top15, aes(x = County)) +
  geom_col(aes(y = Turnout_Rate),
           fill = adjustcolor(line_col, alpha.f = 0.6),
           color = border_col, width = 0.6) +
  geom_line(aes(y = Registered_Voters / max(Registered_Voters) * max(Turnout_Rate)),
            color = line_col, linewidth = 1.1, group = 1) +
  geom_point(aes(y = Registered_Voters / max(Registered_Voters) * max(Turnout_Rate)),
             color = line_col, size = 2) +
  scale_y_continuous(
    name = "Voter Turnout (%)",
    sec.axis = sec_axis(
      ~ . / max(tx_top15$Turnout_Rate) * max(tx_top15$Registered_Voters),
      name = "Total Registered Voters",
      labels = label_comma()
    )
  ) +
  scale_x_discrete(expand = expansion(add = 0.5)) +
  theme_minimal(base_family = "Arial") +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8, color = text_col),
    axis.text.y.left  = element_text(color = text_col),
    axis.text.y.right = element_text(color =  line_col),
    axis.title.y.left  = element_text(color = text_col, face = "bold"),
    axis.title.y.right = element_text(color = line_col, face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title = element_text(family = "Arial", face = "bold", hjust = 0.5, size = 16, color = text_col),
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.background = element_rect(fill = fill_col, color = NA),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Top 15 Texas Counties by Registered Voters (2020)")

tx_bottom15 <- turnout %>%
  arrange(Registered_Voters) %>%
  slice(1:15) %>%
  mutate(County = factor(County, levels = County))

ggplot(tx_bottom15, aes(x = County)) +
  geom_col(aes(y = Turnout_Rate),
           fill = adjustcolor(line_col, alpha.f = 0.6),
           color = border_col, width = 0.6) +
  geom_line(aes(y = Registered_Voters / max(Registered_Voters) * max(Turnout_Rate)),
            color = alt_line, linewidth = 1.1, group = 1) +
  geom_point(aes(y = Registered_Voters / max(Registered_Voters) * max(Turnout_Rate)),
             color = alt_line, size = 2) +
  scale_y_continuous(
    name = "Voter Turnout (%)",
    sec.axis = sec_axis(
      ~ . / max(tx_bottom15$Turnout_Rate) * max(tx_bottom15$Registered_Voters),
      name = "Total Registered Voters",
      labels = label_comma()
    )
  ) +
  scale_x_discrete(expand = expansion(add = 0.5)) +
  theme_minimal(base_family = "Arial") +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8, color = text_col),
    axis.text.y.left  = element_text(color = text_col),
    axis.text.y.right = element_text(color = alt_text),        # ← fix from red to text_col
    axis.title.y.left  = element_text(color = text_col, face = "bold"),
    axis.title.y.right = element_text(color = alt_text, face = "bold"),  # ← fix
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title = element_text(family = "Arial", face = "bold",
                              hjust = 0.5, size = 16, color = text_col),
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.background = element_rect(fill = fill_col, color = NA),
    plot.margin = margin(30, 30, 30, 30)
  )+
  ggtitle("Bottom 15 Texas Counties by Registered Voters (2020)")
Figure 4: Lowest 15 Texas counties by registered voters. Bars show turnout percentage; red line shows total registered voters (right axis).

Figure 6

Education levels

library(ggplot2)
library(dplyr)
library(sf)
library(tigris)
library(scales)
library(plotly)

# --- Load Texas county geometries ---
options(tigris_use_cache = TRUE)
tx_counties <- counties(state = "TX", cb = TRUE, year = 2020)

# --- Prepare for join ---
tx_counties <- tx_counties %>%
  mutate(County = toupper(NAME))

turnout <- turnout %>%
  mutate(County = toupper(County))

# --- Join with education data ---
tx_edu <- left_join(tx_counties, turnout, by = "County")

# --- Compute quintiles for mean_years ---
tx_edu <- tx_edu %>%
  mutate(mean_years_q = ntile(mean_years, 5))

# --- Build base ggplot map ---
p_mean_years <- ggplot(tx_edu) +
  geom_sf(aes(fill = mean_years_q,
              text = paste0(
                "<b>", County, " County</b><br>",
                "Mean Years of Schooling: ", sprintf("%.2f", mean_years), "<br>",
                "Education Index: ", sprintf("%.2f", edu_index)
              )),
          color = border_col, linewidth = 0.2) +
  scale_fill_gradientn(
    colors = rev(brewer_pal(palette = "Greens")(5)),  # light = high, dark = low
    name = NULL,
    limits = c(1, 5),
    breaks = c(1, 5),
    labels = c("Highest Education", "Lowest Education"),
    guide = guide_colorbar(
      barheight = unit(4, "cm"),
      barwidth  = unit(0.5, "cm"),
      ticks = FALSE,
      label.position = "right",
      title = NULL,
      label.theme = element_text(family = "sans", color = text_col, size = 9)
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    legend.justification = c(0.5, 0.5),
    plot.title = element_text(family = "sans", face = "bold",
                              size = 16, hjust = 0.5, color = text_col),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Mean Years of Schooling by County (Texas, 2020)")

# --- Convert to interactive Plotly map with hover ---
p_mean_years_plotly <- ggplotly(p_mean_years, tooltip = "text") %>%
  style(hoverlabel = list(bgcolor = "white", font = list(family = "sans", color = "#333333")))

# --- Display interactive map inline ---
p_mean_years_plotly
Figure 5: Mean Years of Schooling by County in Texas (2020)
# --- Export static version if needed ---
ggsave("mean_years_choropleth.png", plot = p_mean_years,
       width = 9, height = 8, dpi = 300, units = "in", limitsize = FALSE)

Figure 7

#| label: fig-voting-education-bars
#| fig-cap: "County-level comparison of Voter Participation and Education Index (2020)"
#| fig-width: 10
#| fig-height: 10
#| echo: TRUE
#| message: FALSE
#| warning: FALSE

library(ggplot2)
library(dplyr)

# --- Prepare data ---
# Assume `turnout` already contains Turnout_Rate and edu_index
# Sort counties by turnout descending
bar_data <- turnout %>%
  arrange(desc(Turnout_Rate)) %>%
  mutate(County = factor(County, levels = rev(County)))  # top = highest turnout

# --- Create the “double bar” style plot ---
p_double <- ggplot(bar_data) +
  # Left side: voter participation
  geom_bar(aes(x = -Turnout_Rate, y = County),
           stat = "identity", fill = line_col, alpha = 0.8, width = 0.8) +
  # Right side: education index
  geom_bar(aes(x = edu_index * 100, y = County),
           stat = "identity", fill = alt_line, alpha = 0.8, width = 0.8) +
  # Center line
  geom_vline(xintercept = 0, color = border_col, linewidth = 0.8) +
  # Axis and labels
  scale_x_continuous(
    name = NULL,
    limits = c(-max(bar_data$Turnout_Rate, na.rm = TRUE),
                max(bar_data$edu_index, na.rm = TRUE) * 100),
    breaks = seq(-100, 100, by = 25),
    labels = function(x) abs(x)
  ) +
  labs(
    y = NULL,
    title = "Voting Participation vs. Education Index by County (Texas, 2020)",
    subtitle = "Counties sorted by Voter Participation (Highest → Lowest)"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(color = text_col, size = 10),
    axis.title.x = element_text(color = text_col, face = "bold"),
    plot.title = element_text(color = text_col, face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(color = text_col, size = 12, hjust = 0.5)
  ) +
  annotate("text", x = -90, y = nrow(bar_data) + 2,
           label = "Voter Turnout (%)", color = line_col,
           family = "sans", fontface = "bold", size = 4.2) +
  annotate("text", x = 90, y = nrow(bar_data) + 2,
           label = "Education Index (0–1 → %)", color = alt_text,
           family = "sans", fontface = "bold", size = 4.2)

# --- Display inline ---
p_double
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).

# --- Export PNG for publication use ---
ggsave("double_bar_voting_education.png", plot = p_double,
       width = 10, height = 10, dpi = 300, units = "in", limitsize = FALSE)
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).

Figure 8

library(ggplot2)
library(dplyr)

# --- Prepare data ---
# Sort counties by education index descending
bar_data_edu <- turnout %>%
  arrange(desc(edu_index)) %>%
  mutate(County = factor(County, levels = rev(County)))  # top = highest education

# --- Create the mirrored “violin-style” double bar chart ---
p_double_edu <- ggplot(bar_data_edu) +
  # Left side: voter participation
  geom_bar(aes(x = -Turnout_Rate, y = County),
           stat = "identity", fill = line_col, alpha = 0.8, width = 0.8) +
  # Right side: education index
  geom_bar(aes(x = edu_index * 100, y = County),
           stat = "identity", fill = alt_line, alpha = 0.8, width = 0.8) +
  # Center line
  geom_vline(xintercept = 0, color = border_col, linewidth = 0.8) +
  # Axes and scales
  scale_x_continuous(
    name = NULL,
    limits = c(-max(bar_data_edu$Turnout_Rate, na.rm = TRUE),
                max(bar_data_edu$edu_index, na.rm = TRUE) * 100),
    breaks = seq(-100, 100, by = 25),
    labels = function(x) abs(x)
  ) +
  labs(
    y = NULL,
    title = "Education Index vs. Voter Participation by County (Texas, 2020)",
    subtitle = "Counties sorted by Education Index (Highest → Lowest)"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(color = text_col, size = 10),
    axis.title.x = element_text(color = text_col, face = "bold"),
    plot.title = element_text(color = text_col, face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(color = text_col, size = 12, hjust = 0.5)
  ) +
  annotate("text", x = -90, y = nrow(bar_data_edu) + 2,
           label = "Voter Turnout (%)", color = line_col,
           family = "sans", fontface = "bold", size = 4.2) +
  annotate("text", x = 90, y = nrow(bar_data_edu) + 2,
           label = "Education Index (0–1 → %)", color = alt_text,
           family = "sans", fontface = "bold", size = 4.2)

# --- Display inline ---
p_double_edu

# --- Export PNG for publication use ---
ggsave("double_bar_education_voting.png", plot = p_double_edu,
       width = 10, height = 10, dpi = 300, units = "in", limitsize = FALSE)
Figure 6: County-level comparison of Education Index and Voter Participation (2020)

Figure 9

#| label: fig-vote-edu-map
#| fig-cap: "Composite Civic Vitality (Voting + Education) by County in Texas (2020)"
#| fig-width: 8
#| fig-height: 7
#| echo: TRUE
#| message: FALSE
#| warning: FALSE

library(ggplot2)
library(dplyr)
library(sf)
library(tigris)
library(scales)
library(plotly)

# --- Load Texas county geometries ---
options(tigris_use_cache = TRUE)
tx_counties <- counties(state = "TX", cb = TRUE, year = 2020)

# --- Prepare for join ---
tx_counties <- tx_counties %>%
  mutate(County = toupper(NAME))

turnout <- turnout %>%
  mutate(County = toupper(County))

# --- Join with composite civic data ---
tx_voteedu <- left_join(tx_counties, turnout, by = "County")

# --- Compute quintiles for the composite variable ---
tx_voteedu <- tx_voteedu %>%
  mutate(vote_edu_q = ntile(vote_edu, 5))

# --- Build ggplot choropleth ---
p_voteedu <- ggplot(tx_voteedu) +
  geom_sf(aes(fill = vote_edu_q,
              text = paste0(
                "<b>", County, " County</b><br>",
                "Civic Vitality Index: ", sprintf("%.2f", vote_edu), "<br>",
                "Education Index: ", sprintf("%.2f", edu_index), "<br>",
                "Voter Turnout: ", sprintf("%.2f", Turnout_Rate)
              )),
          color = border_col, linewidth = 0.2) +
  scale_fill_gradientn(
    colors = rev(brewer_pal(palette = "Blues")(5)),  # light = high, dark = low
    name = NULL,
    limits = c(1, 5),
    breaks = c(1, 5),
    labels = c("Highest Civic Vitality", "Lowest Civic Vitality"),
    guide = guide_colorbar(
      barheight = unit(4, "cm"),
      barwidth  = unit(0.5, "cm"),
      ticks = FALSE,
      label.position = "right",
      title = NULL,
      label.theme = element_text(family = "sans", color = text_col, size = 9)
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    legend.justification = c(0.5, 0.5),
    plot.title = element_text(family = "sans", face = "bold",
                              size = 16, hjust = 0.5, color = text_col),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Composite Civic Vitality (Voting + Education) by County (Texas, 2020)")
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
# --- Convert to interactive Plotly map ---
p_voteedu_plotly <- ggplotly(p_voteedu, tooltip = "text") %>%
  style(hoverlabel = list(bgcolor = "white",
                          font = list(family = "sans", color = "#333333")))

# --- Display interactive map inline ---
p_voteedu_plotly
# --- Export static version if needed ---
ggsave("vote_edu_choropleth.png", plot = p_voteedu,
       width = 9, height = 8, dpi = 300, units = "in", limitsize = FALSE)

Figure 9

#| label: tbl-voteedu-tops
#| tbl-cap: "Top 10 Counties by Civic Vitality Index (2020)"
#| echo: TRUE
#| message: FALSE
#| warning: FALSE

library(dplyr)
library(knitr)

# --- Top 10 counties ---
top10_vitality <- turnout %>%
  arrange(desc(vote_edu)) %>%
  slice_head(n = 10) %>%
  select(County, vote_edu, edu_index, Turnout_Rate) %>%
  mutate(across(c(vote_edu, edu_index, Turnout_Rate), round, 3))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(vote_edu, edu_index, Turnout_Rate), round, 3)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
kable(top10_vitality,
      caption = "Top 10 Counties by Civic Vitality Index (2020)",
      col.names = c("County", "Civic Vitality", "Education Index", "Turnout Rate"))
Top 10 Counties by Civic Vitality Index (2020)
County Civic Vitality Education Index Turnout Rate
ROBERTS 0.719 0.629 80.882
LLANO 0.708 0.657 75.863
WILLIAMSON 0.708 0.646 76.872
ARMSTRONG 0.693 0.644 74.232
TERRELL 0.692 0.703 68.053
MOTLEY 0.691 0.622 75.902
CALLAHAN 0.683 0.667 69.907
ARCHER 0.680 0.627 73.356
ROBERTSON 0.678 0.672 68.381
JOHNSON 0.671 0.659 68.218
# --- Bottom 10 counties ---
bottom10_vitality <- turnout %>%
  arrange(vote_edu) %>%
  slice_head(n = 10) %>%
  select(County, vote_edu, edu_index, Turnout_Rate) %>%
  mutate(across(c(vote_edu, edu_index, Turnout_Rate), round, 3))

kable(bottom10_vitality,
      caption = "Bottom 10 Counties by Civic Vitality Index (2020)",
      col.names = c("County", "Civic Vitality", "Education Index", "Turnout Rate"))
Table 2: Bottom 10 Counties by Civic Vitality Index (2020)
Bottom 10 Counties by Civic Vitality Index (2020)
County Civic Vitality Education Index Turnout Rate
ZAPATA 0.386 0.303 46.918
MAVERICK 0.407 0.350 46.433
BAILEY 0.424 0.322 52.557
DEAF SMITH 0.439 0.360 51.798
TITUS 0.444 0.291 59.668
DAWSON 0.448 0.363 53.336
LYNN 0.449 0.328 56.927
ZAVALA 0.455 0.367 54.290
CASTRO 0.455 0.370 54.062
SAN PATRICIO 0.457 0.315 59.866
# --- 5 major Texas metros ---
metro_counties <- c("TRAVIS", "DALLAS", "HARRIS", "BEXAR", "TARRANT")

metro_vitality <- turnout %>%
  filter(County %in% metro_counties) %>%
  select(County, vote_edu, edu_index, Turnout_Rate) %>%
  mutate(across(c(vote_edu, edu_index, Turnout_Rate), round, 3)) %>%
  arrange(desc(vote_edu))

kable(metro_vitality,
      caption = "Major Metro Area Counties: Civic Vitality Index (2020)",
      col.names = c("County", "Civic Vitality", "Education Index", "Turnout Rate"))
Table 3: Major Metro Area Counties: Civic Vitality Index (2020)
Major Metro Area Counties: Civic Vitality Index (2020)
County Civic Vitality Education Index Turnout Rate
TARRANT 0.624 0.560 68.840
HARRIS 0.620 0.578 66.148
TRAVIS 0.525 0.338 71.214
BEXAR 0.502 0.356 64.809
DALLAS 0.483 0.309 65.751

Notes

Certain elements of this preparation were enhanced with an LLM including but not limited to code restructuring, commenting, and information layout.